# -*- codeing = utf-8 -*-
from bs4 import BeautifulSoup
import requests
#创建一个请求网址函数
def get_url(url):
    try:
        headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400'
        }
        resp = requests.get(url=url,headers=headers)
        resp.encoding = 'UTF-8'
        soup = BeautifulSoup(resp.text,'lxml')
        #print(soup)
        return soup
    except Exception as err:
        print("程序请求失败")
#创建一个爬取，热门旅游地方的所有链接
def get_remen_url(html_code):
    name_list = html_code.select('#root > div > div > div.vacation_bd > div.basefix.festival_logo_frame > div.sidenav_destination.new_sidenav_destination > ul > li:nth-child(1) >div > p > a')
    #print(name_list)
    dict_1 = {}#保存地区和链接
    for i in name_list:
        name = i.text
        #print(name)
        href = i.attrs['href']
        dict_1[name] = href
    return dict_1

#因为所有的页面都存在100页

#创建主函数，进行内容爬取，以及数据持久化
def main(url):
    #请求链接到主页面
    soup1 = get_url(URL)
    #请求所有的url
    soup_dict = get_remen_url(soup1)
    #将字典的键值对进行分离
    for key,value in soup_dict.items():
        #print(key,value)
        #请求每一个旅游景点的链接
        soup2 = get_url(value)
        for i in range(1,101):
            #每一页的网址不一样，拼接一下
            new_href = f'https://vacations.ctrip.com/list/whole/sc.html?p={i}&sv=%E4%B9%9D%E5%AF%A8%E6%B2%9F'
            #因为每一个都有一百页，所以直接请求
            html_code = get_url(new_href)
            #print(html_code)
            #爬取所需要的信息
            div_list = html_code.select_one('#root > div > div.vacation_bd > div.main_col > div:nth-child(3) > div > div > div.list_product_right > div > div.list_content_right > div.list_sr_price_box.basefix > div > strong')
            print(div_list)
            # for i in div_list:
            #     title = i.select_one('div > div > div.list_product_right > p')
            #
            #     print(title)



        break



URL = 'https://vacations.ctrip.com/'

main(URL)




